** This Do-File replicates Figure 6 from P. Dutronc, A. Tondini "Large Means-Tested Pensions with Informal Labor Markets" ** 

set more off

cd "" /*Input here directory where the PALMS v3.2 (only years from 2002--2015) is stored */ 

use "south_africa_palms.dta", clear

**************************************
** Select Population for Estimation ** 
**************************************

drop if year==2008 | year==2009 /* Exclude years in which the threshold is lowered from 65 to 60 */ 

keep if pop_group==1 | pop_group==2  /* Keep Black and Coloured individuals only, see text for explanation */ 

drop if sex==. | sex==9 /* Exclude observations with missing value for gender */ 

gen married=(marital_status==1) /* 1/0 variable for marital status */ 


** Adjust weights for multiple year estimation ** 

bys year: egen mean_weight=mean(weight) 
replace weight=weight/mean_weight /* Adjust weights to have the same mean across waves */

** Definition of Informal Employment **

gen tot_inf=1 if status==1 & no_written_contract==1 /*Employees w/o written contract */
replace tot_inf=0 if status==1 & no_written_contract==0  /*Employees w. written contract */
replace tot_inf=1 if status==2 & not_registered==1 /*Self-employed w/o registered business*/
replace tot_inf=0 if status==2 & not_registered==0 /*Self-employed w. registered business*/
replace tot_inf=0 if status==3 | status==4 /*Not Employed*/ 

keep if tot_inf!=. /* Exclude from the estimation those with missing informality status.*/

gen f_emp=(tot_inf==0 & emp==1)


**************************************
************ Regressions ** **********
**************************************

gen OAP_1=(age>=60) /* Indicator variable for being at or above the threshold */ 


gen x=age-60 /* center discontinuity at 0 */ 
gen x2=x^2 

gen post=(year>2008) 

gen yob=year-age  

egen cluster=group(yob pop_group) /* Cluster for standard errors by race*cohort group to account for repeated observations over time in DiDisc estimation, see text for explanation */ 

egen edu_cat = cut(education), at(0 5 9 18) /* Categorical variable for education based on years of schooling */ 


ta edu_cat
ta edu_cat if sex==1 & post==0 &  inrange(age, 55,64) [aw=weight]
ta edu_cat if sex==1 & post==1 &  inrange(age, 55,64) [aw=weight]

*****************

postfile coeffs coeff se str10 V educ sample sex spec using "coeffs.dta", replace

local controls i.Province i.married 


foreach sex in 1 2 {
	foreach sample in 1 2 3 {
		local samplecond = cond(`sample'==1, "age >= 40 & age <80", cond(`sample'==2, "age >=50 & age < 70", "age >=55 & age <65"))
		foreach spec in 1 2 {
			local f_form = cond(`spec'==1, "x", "x x2")
			preserve
			keep if `samplecond'  
			eststo clear 
			foreach v in tot_inf f_emp {
				eststo r : reg `v' i.pop_group i.year `controls'  c.(`f_form') c.(`f_form')#1.OAP_1 c.(`f_form')#1.post c.(`f_form')#1.OAP_1#1.post OAP_1 1.OAP_1#1.post   [w=weight] if sex==`sex' , cluster(cluster)
				local b = _b[1.OAP_1#1.post]
				local se = _se[1.OAP_1#1.post]
				post coeffs (`b') (`se') ("`v'") (-99) (`sample') (`sex') (`spec')
				foreach lvl in 0 5 9 {
					eststo r : reg `v'  i.pop_group i.year `controls' c.(`f_form') c.(`f_form')#1.OAP_1 c.(`f_form')#1.post c.(`f_form')#1.OAP_1#1.post OAP_1 1.OAP_1#1.post  [w=weight] if sex==`sex' & edu_cat ==`lvl' , cluster(cluster)
					local b = _b[1.OAP_1#1.post]
					local se = _se[1.OAP_1#1.post]
					post coeffs (`b') (`se') ("`v'") (`lvl') (`sample') (`sex') (`spec')
				}
			}
			restore
		}
	}
}

postclose coeffs


use "coeffs.dta", clear
cap drop ub lb
recode educ (-99 = 1) (0 = 2) ( 5 =3 ) (9=4) 

cap label drop educ
label define educ  1 "All" 2 "0 to 4 years" 3 "5 to 8 years" 4 "9+years" 
label values educ educ

gen ub = coeff + 1.96*se 
gen lb = coeff - 1.96*se 

***********************
** Figure 6 - Panel a**
***********************
#delimit ;
tw (scatter coeff educ, mcolor(black))
   (rcap ub lb educ, lcolor(black))
	if sample ==3 & spec==1 & sex==1 & V=="tot_inf",
	graphregion(color(white)) xlabel(,valuelabel) ylabel(-0.1 (0.05) 0.15) 
	legend(label(1 "Coeff.") label(2 "95% CI"));
#delimit cr
*graph export "tot_inf_men_55_64_linear_educ.png", as(png) replace

***********************
** Figure 6 - Panel b**
***********************

#delimit ;
tw (scatter coeff educ, mcolor(black))
   (rcap ub lb educ, lcolor(black))
	if sample ==3 & spec==1 & sex==1 & V=="f_emp",
	graphregion(color(white)) xlabel(,valuelabel) ylabel(-0.1 (0.05) 0.15)  
	legend(label(1 "Coeff.") label(2 "95% CI"));
#delimit cr
*graph export "f_emp_men_55_64_linear_educ.png", as(png) replace

erase coeffs.dta 
